
*---------------------------------David's folders-------------------
if "`c(username)'"=="da334" | "`c(username)'"=="David" | "`c(username)'"=="Atkin" | "`c(username)'"=="dga" | "`c(username)'"=="atkin" {
global dump "C:/Scratch"
global dropbox "C:/Work/Engel_GFT"
global stataloc "C:/Dropbox/Stata15/StataMP-64"
global codeloc "$dropbox/replication_files/do_files"
global output "$dropbox/replication_files/data/intermediate_data/conventional_price_indices"
}
if "`c(username)'"=="dga" {
global dump "G:/Scratch" 
}
*-------------------------------------------------------------------



clear all
set maxvar 32000
set more off




***************************************************************************************
local foldno "1"
local bs="0"

*Set to 1 if running step 1 (prices):
local prices_file 1

*Set to 1 if running step 2 (Engel slopes):
local slopes_file 1

*Set to 1 if demeaning prices, to 0 if not demeaning prices:
local demean 1

*G version
local Gversion V5_iV1_gV3_ueshares_smooth




* same price data and Engel curves as for v5:
global new_input_price "$dropbox/replication_files/data/intermediate_data/conventional_price_indices" 

global new_input_engel "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/"

* but new folder:
global new_output "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/"


***************************************************************************************


* working folder:
cd "${dump}/Engel`foldno'/"




***************************************************************************************
* Bring in expenditure shares and local slopes of engel curves
***************************************************************************************


use "Engel_g25_rshare_V1_DM_G108_15_222_bs`bs'.dta", clear
keep market_id
drop if market_id == 222
save shares_and_slopes_V`foldno', replace

* Looping across markets:
forvalues m=1(1)445 {
cap use "Engel_g25_rshare_V1_DM_G108_15_`m'_bs`bs'.dta", clear
if _rc==0 {
display "Market `m'

keep market_id sector state43 district43 percentile r43lp* r43predict_lnmpcew r55lp* r55predict_lnmpcew

order market_id sector state43 district43 percentile r43predict_lnmpcew r55predict_lnmpcew r43lp_wbwg25_rshare_* r55lp_wbwg25_rshare_* 
sort  market_id sector state43 district43 percentile 
count


** Re-Creating inverse slopes:
foreach round in r43 r55 {
forvalues i=1/34 {

* LOG income w.r.t expenditure shares in LEVELS:
qui gen invslope_ini_`round'_`i'_low = (`round'predict_lnmpcew - `round'predict_lnmpcew[_n-1])/( `round'lp_wbwg25_rshare_`i'- `round'lp_wbwg25_rshare_`i'[_n-1])
qui gen invslope_ini_`round'_`i'_up = (`round'predict_lnmpcew - `round'predict_lnmpcew[_n+1])/(`round'lp_wbwg25_rshare_`i'-`round'lp_wbwg25_rshare_`i'[_n+1])
qui gen invslope_ini_`round'_`i' = invslope_ini_`round'_`i'_up if abs(invslope_ini_`round'_`i'_up) <=  abs(invslope_ini_`round'_`i'_low)
qui replace invslope_ini_`round'_`i' = invslope_ini_`round'_`i'_low if abs(invslope_ini_`round'_`i'_low) <  abs(invslope_ini_`round'_`i'_up)
qui replace invslope_ini_`round'_`i' = . if invslope_ini_`round'_`i'_low * invslope_ini_`round'_`i'_up < 0
qui drop invslope_ini_`round'_`i'_low invslope_ini_`round'_`i'_up
}
* end of loop across goods
}
* end of loop across rounds


qui keep if percentile/10 - int(percentile/10) == 0
rename percentile decile

*sum invelasticity_*

reshape long invslope_ini_r43_ invslope_ini_r55_ r43lp_wbwg25_rshare_ r55lp_wbwg25_rshare_, i(market_id sector state43 district43 decile) j(item_id)

rename invslope_ini_r43_ invslope_ini_r43
rename invslope_ini_r55_ invslope_ini_r55
rename r43lp_wbwg25_rshare_ r43lp_wbwg25_rshare
rename r55lp_wbwg25_rshare_ r55lp_wbwg25_rshare

append using shares_and_slopes_V`foldno'
save shares_and_slopes_V`foldno', replace

}
*end of if rc statement
}
* End of loop across markets

use shares_and_slopes_V`foldno', clear
keep if sector==1
drop if decile == 0 | decile == 100

** Smoothing slopes:
sort market_id item_id decile
foreach year in 43 55 {
gen x_`year'=(invslope_ini_r`year'[_n-1]+invslope_ini_r`year'+invslope_ini_r`year'[_n+1])/3 if item_id==item_id[_n-1] & item_id==item_id[_n+1]  & item_id!=.
replace x_`year'=(invslope_ini_r`year'[_n-1]+invslope_ini_r`year')/2 if item_id==item_id[_n-1] & x_`year'==.
replace x_`year'=(invslope_ini_r`year'[_n+1]+invslope_ini_r`year')/2 if item_id==item_id[_n+1] & x_`year'==.
replace x_`year'=(invslope_ini_r`year')/1 if  x_`year'==.
drop invslope_ini_r`year'
rename x_`year' invslope_ini_r`year'
}
*

save shares_and_slopes_V`foldno', replace




***************************************************************************************
*(2) Bring in Flags and local slopes of engel curves 
***************************************************************************************


* 2a) generate slopes only for monotonic curves
* NEW  extrapolate for non-overlapping cases!
* 2b) generate smoother slopes


*cen stands for restricting to monotonic. Put it was only done for P. So create for T and S.
*NOTE: P is the price index change, S is slope at origin for that decile and T is slope at crossing point

use "${new_input_engel}/temp_rindexes_g25_rshare_V1_DM_G108_15_destslope_4orthog.dta", clear
sort market_id item_decile 

* generate slopes only for monotonic curves:
gen T4355lp_wbwg25cen_rshare = T4355lp_wbwg25ce_rshare if P4355lp_wbwg25cen_rshare!=.
gen T5543lp_wbwg25cen_rshare = T5543lp_wbwg25ce_rshare if P5543lp_wbwg25cen_rshare!=.
*NOTE: P is the price index change, S is slope at origin for that decile and T is slope at crossing point
*new version: rshare instead of eshare




*********************************************



*Generate smooth slopes, here just for "cen":
sort market_id item_id decile

sum T4355lp_wbwg25ce_rshare T5543lp_wbwg25ce_rshare
 
foreach year in 4355 5543 {
foreach sl in T  {
gen x_`sl'`year'=(`sl'`year'lp_wbwg25cen_rshare[_n-1]+`sl'`year'lp_wbwg25cen_rshare+`sl'`year'lp_wbwg25cen_rshare[_n+1])/3 if item_id==item_id[_n-1] & item_id==item_id[_n+1]  & item_id!=.
replace x_`sl'`year'=(`sl'`year'lp_wbwg25cen_rshare[_n-1]+`sl'`year'lp_wbwg25cen_rshare)/2 if item_id==item_id[_n-1] & x_`sl'`year'==.
replace x_`sl'`year'=(`sl'`year'lp_wbwg25cen_rshare[_n+1]+`sl'`year'lp_wbwg25cen_rshare)/2 if item_id==item_id[_n+1] & x_`sl'`year'==.
replace x_`sl'`year'=(`sl'`year'lp_wbwg25cen_rshare)/1 if  x_`sl'`year'==.
drop `sl'`year'lp_wbwg25cen_rshare
rename x_`sl'`year' `sl'`year'lp_wbwg25cen_rshare
}
}
*

* same for "ce":
foreach year in 4355 5543 {
foreach sl in T  {
gen x_`sl'`year'=(`sl'`year'lp_wbwg25ce_rshare[_n-1]+`sl'`year'lp_wbwg25ce_rshare+`sl'`year'lp_wbwg25ce_rshare[_n+1])/3 if item_id==item_id[_n-1] & item_id==item_id[_n+1]  & item_id!=.
replace x_`sl'`year'=(`sl'`year'lp_wbwg25ce_rshare[_n-1]+`sl'`year'lp_wbwg25ce_rshare)/2 if item_id==item_id[_n-1] & x_`sl'`year'==.
replace x_`sl'`year'=(`sl'`year'lp_wbwg25ce_rshare[_n+1]+`sl'`year'lp_wbwg25ce_rshare)/2 if item_id==item_id[_n+1] & x_`sl'`year'==.
replace x_`sl'`year'=(`sl'`year'lp_wbwg25ce_rshare)/1 if  x_`sl'`year'==.
drop `sl'`year'lp_wbwg25ce_rshare
rename x_`sl'`year' `sl'`year'lp_wbwg25ce_rshare
}
}
*


gen T4355lp_extrapolated = T4355lp_wbwg25ce_rshare if nP4355lp_wbwg25cen_rshare==1
gen T5543lp_extrapolated = T5543lp_wbwg25ce_rshare if nP5543lp_wbwg25cen_rshare==1


* same for "extrapolated" :
foreach year in 4355 5543 {
foreach sl in T  {
gen x_`sl'`year'=(`sl'`year'lp_extrapolated[_n-1]+`sl'`year'lp_extrapolated+`sl'`year'lp_extrapolated[_n+1])/3 if item_id==item_id[_n-1] & item_id==item_id[_n+1]  & item_id!=.
replace x_`sl'`year'=(`sl'`year'lp_extrapolated[_n-1]+`sl'`year'lp_extrapolated)/2 if item_id==item_id[_n-1] & x_`sl'`year'==.
replace x_`sl'`year'=(`sl'`year'lp_extrapolated[_n+1]+`sl'`year'lp_extrapolated)/2 if item_id==item_id[_n+1] & x_`sl'`year'==.
replace x_`sl'`year'=(`sl'`year'lp_extrapolated)/1 if  x_`sl'`year'==.
drop `sl'`year'lp_extrapolated
rename x_`sl'`year' `sl'`year'lp_extrapolated
}
}
*



foreach yr in 4355 5543 {

egen mindec = min(decile * (T`yr'lp_extrapolated != .) + 100 * (T`yr'lp_extrapolated == .)), by(market_id item_id)
egen maxdec = max(decile * (T`yr'lp_extrapolated != .)), by(market_id item_id)
replace mindec = . if mindec == 100
replace maxdec = . if maxdec == 0
tab mindec 
tab maxdec

* ok, no missing observations inbetween min and max deciles:
count if T`yr'lp_extrapolated == . & decile > mindec & decile < maxdec & nP`yr'lp_wbwg25cen_rshare==1
count if mindec != . & maxdec == .
count if maxdec != . & mindec == .

egen mindec_slope = sum(T`yr'lp_extrapolated * (decile == mindec)) if mindec != ., by(market_id item_id)
egen maxdec_slope = sum(T`yr'lp_extrapolated * (decile == maxdec)) if maxdec != ., by(market_id item_id)

replace T`yr'lp_extrapolated = mindec_slope if decile < mindec & mindec != . 
replace T`yr'lp_extrapolated = maxdec_slope if decile > maxdec & maxdec != . 

count if T`yr'lp_extrapolated == . & nP`yr'lp_wbwg25cen_rshare==1
drop mindec* maxdec*
}
* end loop across years




sum T4355lp_wbwg25ce_rshare T5543lp_wbwg25ce_rshare

save "$new_output/slopes_V`foldno'.dta", replace

sum T*
*sum S*


sum T4355lp_wbwg25ce_rshare T5543lp_wbwg25ce_rshare
sum T4355lp_wbwg25cen_rshare T5543lp_wbwg25cen_rshare
sum T4355lp_extrapolated T5543lp_extrapolated




***************************************************************************************
*(3) Merge prices and slopes and calculate covariances
***************************************************************************************


* a) Prep prices:



use "$new_input_price/d_ln_p_goods_i_55_43.dta", clear
tab sector
keep if sector == "Rural"

drop sector
sort state43 district43 good_i
/* We should have 407 rural markets, no missing obs:
sum sum_demo_shares_*
sum sum_pluto_shares_*
sum d_ln_p_pluto_55_43_* 
sum d_ln_p_demo_55_43_*
*/
collapse (mean) dist_wt sum_demo_shares_* sum_pluto_shares_* d_ln_p_pluto_* d_ln_p_demo_*, by(state43 district43)
sort state43 district43
compress

count
reshape long sum_demo_shares_ sum_pluto_shares_ d_ln_p_pluto_55_43_ d_ln_p_demo_55_43_ , i(state43 district43 dist_wt) j(item_id)

count
* ok: count=407*34=13838
sum d_ln_p* sum_*
* no missing obs

* here, assuming that prices are for 55 relative to 43, given that prices have increased:
rename d_ln_p_pluto_55_43_ d_ln_p_pluto_4355
rename d_ln_p_demo_55_43_ d_ln_p_demo_4355

gen d_ln_p_pluto_5543 = d_ln_p_pluto_4355
gen d_ln_p_demo_5543 = d_ln_p_demo_4355

save "$new_output/delta_prices_V`foldno'_long.dta", replace



*_______________________________________________________________________________

* b) Merge with slopes:


*Set initial or end engel curve as starting point (one of the two pairs)
************************************************************************



foreach direction in 4355 5543 {

if "`direction'"=="4355" {
local initial 43
local final 55
}

if "`direction'"=="5543" {
local initial 55
local final 43
}



use "$new_output/slopes_V`foldno'.dta",clear

*NOTE: P is the price index change, S is slope at origin for that decile and T is slope at crossing point

*generate inverse slopes:
gen inv_T4355lp_wbwg25cen_rshare = 1/T4355lp_wbwg25cen_rshare
gen inv_T5543lp_wbwg25cen_rshare = 1/T5543lp_wbwg25cen_rshare

gen inv_T4355lp_wbwg25ce_rshare = 1/T4355lp_wbwg25ce_rshare
gen inv_T5543lp_wbwg25ce_rshare = 1/T5543lp_wbwg25ce_rshare

gen inv_T4355lp_extrapolated = 1/T4355lp_extrapolated
gen inv_T5543lp_extrapolated = 1/T5543lp_extrapolated

*Assemble datasets:
merge m:1 state43 district43 item_id using "$new_output/delta_prices_V`foldno'_long.dta"
*ok, all _merge==3
drop _merge

merge 1:1 sector state43 district43 item_id decile using shares_and_slopes_V`foldno'
*ok, all _merge==3
drop _merge



*set group definition:
*********************

merge m:1 item_id using "$dropbox/replication_files/data/intermediate_data/hh_shares/item_codesV1_DM_G108_15.dta"

drop _merge item_no
compress

*G2- foods unprocessed:
*G2: 1 2 3 5 6 9 11 15 17 20 21 22 25 26 27 28

*G3, slightly processed:
*G3: 10 12 16 18 19 23 24 29 30 31 32 33 34

*G5: light-fuel:
*G5: 4 7 8 13 14

order  market_id state43 district43 sector decile item_* G_15 ///
dist_wt sum_demo sum_pluto d_ln_p* inv_T* ///
nP4355lp_wbwg25cen_rshare nP5543lp_wbwg25cen_rshare






foreach type in demo  {

* Different types of averages:
egen tot_all_`type'_`direction' = total(r`initial'lp_wbwg25_rshare), by(market_id decile G_15)
egen aux_all_`type'_`direction' = total(r`initial'lp_wbwg25_rshare * d_ln_p_`type'_`direction'), by(market_id decile G_15)
gen P_av_all_`type'_`direction' = aux_all_`type'_`direction' / tot_all_`type'_`direction'
drop aux*
}
* end loop across types


* Computing Std Dev of relative prices to check quality of 1st order approximation:
egen devlogP_demo_`direction' = sd(d_ln_p_demo_`direction' - P_av_all_demo_`direction'), by(market_id decile G_15)
sum devlogP*

*Conditioning on having a match (and thus conditioning on double-monotonicity):
* Note: it's symmetric: nP5543lp_wbwg25cen_rshare = nP4355lp_wbwg25cen_rshare

keep if nP`direction'lp_wbwg25cen_rshare==1

*Local for dependent variable:
local slope_var inv_T`direction'lp_extrapolated 


*Adjusting negative expenditureshares:
replace r43lp_wbwg25_rshare = 0 if r43lp_wbwg25_rshare < 0
replace r55lp_wbwg25_rshare = 0 if r55lp_wbwg25_rshare < 0

*Need to count how many goods by market and decile we have:
gen one_r`direction'=1 if `slope_var'!=.
egen count_goods_r`direction'=total(one_r`direction'), by(market_id decile)
drop if count_goods_r`direction'<=2



**************************************
*Calculate 1st-order correction terms:
**************************************


foreach type in demo  {

gen bias_bygood_full_`type'_`direction'    = - r`initial'lp_wbwg25_rshare * (d_ln_p_`type'_`direction' - P_av_all_`type'_`direction'    ) * `slope_var'

}
* end loop across types

keep market_id state43 district43 sector decile item_id G_15 sum_demo bias_* r`initial'total_wt tot_* r`initial'lp_* `slope_var' count* devlogP*
sort market_id state43 district43 sector decile G_15 item_id 



sort market_id decile G_15 item_id  
save ${new_output}/bias_by_good_`direction'.dta, replace



**************************
* Average across goods:

use ${new_output}/bias_by_good_`direction'.dta, clear

foreach type in demo  {

egen aux3_full_`type'_`direction'    = total(bias_bygood_full_`type'_`direction'   ), by(market_id decile G_15)

* Different types of measures of bias:
gen bias_full_`type'_`direction'    = aux3_full_`type'_`direction'    / count_goods_r`direction' 


drop aux*
}
* end loop across types

drop bias_bygood_*
drop item_id G_15

duplicates drop market_id decile, force

sort market_id decile 
save ${new_output}/bias_1st_order_`direction'.dta, replace


}


exit, clear STATA









